# wrangle information on the plot type, ES, ...plot_info <- study1 %>%pivot_longer(2:195, names_to ="variables", values_to ="values", values_transform = as.character) %>% dplyr::filter(str_detect(variables, "plot")) %>%# we only need the rows with info on plots tidyr::separate(col = values, into =c("type", "axis", "effsize"), # separate the info into three columnssep ="_", remove = F) %>% dplyr::mutate(plot = variables, # rename variables for later jointype =paste(type, axis, sep ="_")) %>% dplyr::select(-variables, -axis)# wrangle answers to items on each pageitem_values <- study1 %>% dplyr::select(-c(topic:itemo)) %>%pivot_longer(2:169, names_to ="variables", values_to ="values", values_transform = as.character) %>% dplyr::mutate(variables =case_when( # recode variable names that have variables =="sensi_6"~"sensi_06", # accidentally been labeled variables =="acccl_6"~"acccl_06", # without zero variables =="accu3_6"~"accu3_06", variables =="accov_6"~"accov_06", variables =="diffi_6"~"diffi_06", variables =="infor_6"~"infor_06", variables =="value_6"~"value_06",TRUE~ variables )) %>% dplyr::mutate(plot =paste0("plotx_", str_sub(variables, -2, -1)), # create variable for later joinvariables =str_sub(variables, 1, -4)) %>%# rename variable names to get a data set # with one line per participant per pagepivot_wider(id_cols =c(session, plot), names_from ="variables", values_from ="values")skim(plot_info)
Data summary
Name
plot_info
Number of rows
960
Number of columns
5
_______________________
Column type frequency:
character
5
________________________
Group variables
None
Variable type: character
skim_variable
n_missing
complete_rate
min
max
empty
n_unique
whitespace
session
0
1
64
64
0
40
0
values
0
1
17
24
0
24
0
type
0
1
13
19
0
4
0
effsize
0
1
3
4
0
6
0
plot
0
1
8
8
0
24
0
skim(item_values)
Data summary
Name
item_values
Number of rows
960
Number of columns
9
_______________________
Column type frequency:
character
9
________________________
Group variables
None
Variable type: character
skim_variable
n_missing
complete_rate
min
max
empty
n_unique
whitespace
session
0
1.0
64
64
0
40
0
plot
0
1.0
8
8
0
24
0
sensi
480
0.5
59
62
0
3
0
acccl
480
0.5
1
4
0
11
0
accu3
480
0.5
1
3
0
48
0
accov
480
0.5
1
3
0
52
0
diffi
0
1.0
1
1
0
7
0
infor
0
1.0
1
1
0
7
0
value
0
1.0
1
1
0
7
0
# join the two data setsstudy1_w <-full_join(plot_info, item_values, by =c("session", "plot")) %>%# by participant and page (plot) dplyr::select(-values) %>% dplyr::mutate(rating_cl =as.numeric(acccl), # some var need to be defined asrating_u3 =as.numeric(accu3), # numeric againrating_ov =as.numeric(accov),diffi =as.numeric(diffi),infor =as.numeric(infor),value =as.numeric(value),effsize =as.numeric(effsize),effsize_cl =case_when( # there is no negative Cliff's Delta, so we have to compute # two transformations effsize >0~ (((2*pnorm(effsize/2))-1)/pnorm(effsize/2)),# transform the actual effect size Cohen's d to Cliff's Delta effsize <0~ (- (((2*pnorm(abs(effsize)/2))-1)/pnorm(abs(effsize)/2))) # transform the actual effect size Cohen's d to Cliff's Delta # and make it negative as in the item ),effsize_u3 =1-pnorm(effsize), # reverse so that it fits the direction of the U3 item# transform the actual effect size Cohen's d to Cohen's U3effsize_ov =2*pnorm(-abs(effsize) /2), # transform the actual effect size Cohen's d to overlap# actual difference of rating relative to depicted effectsize diff_cl = (rating_cl - effsize_cl)/2,# actual difference of rating relative to depicted effectsizediff_u3 = (rating_u3/100) - effsize_u3,# actual difference of rating relative to depicted effectsize diff_ov = (rating_ov/100) - effsize_ov,diffi_normed = ((diffi -1) /3) -1, # transform item to -1 to 1infor_normed = ((infor -1) /3) -1, # transform item to -1 to 1value_normed = ((value -1) /3) -1) %>%# transform item to -1 to 1group_by(session) %>%mutate(rating_ov_missconcept =median(rating_ov, na.rm = T) <68.9,rating_u3_missconcept =median(rating_u3, na.rm = T) <21.2) %>%ungroup() %>%mutate(rating_u3_filtered =ifelse(rating_u3_missconcept == T, NA, rating_u3),rating_ov_filtered =ifelse(rating_ov_missconcept == T, NA, rating_ov),diff_u3_filtered = (rating_u3_filtered/100) - effsize_u3,diff_ov_filtered = (rating_ov_filtered/100) - effsize_ov,sensi_binary =ifelse(is.na(sensi), # 1 if NOT "equal"NA,as.numeric(!grepl("equal", sensi))),sensi_ordinal =ordered(factor(substr(sensi, 55, 100)),levels =c("inferior","equal","superior")),sensi_binary_filtered =case_when(sensi_ordinal =="equal"~0, (sensi_ordinal =="inferior"& effsize <0) | (sensi_ordinal =="superior"& effsize >0) ~as.numeric(NA), (sensi_ordinal =="inferior"&# was not there effsize >0) | (sensi_ordinal =="superior"& effsize <0) ~1, TRUE~as.numeric(NA)), # was 1sensi_correct =case_when(sensi_ordinal =="equal"~"judged equal", (sensi_ordinal =="inferior"& effsize <0) | (sensi_ordinal =="superior"& effsize >0) ~"wrong direction", (sensi_ordinal =="inferior"&# was not there effsize >0) | (sensi_ordinal =="superior"& effsize <0) ~"right direction", TRUE~NA_character_),effsize_abs =abs(effsize))skim(study1_w)
sociodemographics <-read_csv("data/teachers_study1a.csv") %>%select(session, mcstu, texpe) %>%# mcsubfilter(!is.na(mcstu & texpe)) %>%mutate(reply = session %in%c(study1_w$session)) %>%filter(!reply =="FALSE") %>%select(-reply) %>%mutate(mcstu =as.factor(mcstu),texpe =as.numeric(texpe))skim(sociodemographics) # n = 15 participants checked "others" when asked which school type they teach at
# create a list of u3_misconceptualizersu3_misconceptualizers <- study1_w %>%filter(rating_u3_missconcept == T) %>%pull(session) %>%unique()# create a list of ov_misconceptualizersov_misconceptualizers <- study1_w %>%filter(rating_ov_missconcept == T) %>%pull(session) %>%unique() ### wrangle time stamp data ####################################################study1_w_timestamp <-read_csv(here("data/teachers_study1_N40_detailed.csv")) %>%# filter participants from study1_w onlyfilter(session %in% study1_w$session) %>%# we only need vars sensitivity or accuracy dplyr::filter(str_detect(item_name, "sensi|acccl|accu3|accov")) %>%# create var with plot numbermutate(plot =paste0("plotx_", str_sub(item_name, -2, -1)),# recode wrong item labellingplot =ifelse(plot =="plotx__6", "plotx_06", plot)) %>%relocate(session, plot) %>%# delete the page number in item namemutate(item_name =str_sub(item_name, 1, 5)) %>%pivot_wider(id_cols =c(session, plot), names_from = item_name, values_from = answered_relative) %>%rowwise() %>%# what was the time of the first item to be clicked?mutate(effic =min(sensi, acccl, accu3, accov, na.rm=T)) %>%ungroup() %>% dplyr::select(session, plot, effic, sensi, acccl, accu3, accov) %>%left_join(., study1_w %>%select(session, plot, type), by=c("session", "plot")) %>%# generate data set so that the six plots from the same type are ordered# one after the other (and not 1-24)group_by(session, type) %>%arrange(plot) %>%mutate(plotNrWithin =1:n()) %>%ungroup() %>%group_by(plotNrWithin, type) %>%mutate(effic_10righttrunc =ifelse(effic >quantile(effic, .9), NA, effic),effic_05righttrunc =ifelse(effic >quantile(effic, .95), NA, effic),log_effic_05righttrunc =log(effic_05righttrunc),log_effic_10righttrunc =log(effic_10righttrunc),plotNrWithin0 = plotNrWithin -1,plotNrWithin_factor =as.factor(plotNrWithin)) %>%ungroup()skim(study1_w_timestamp)
Somewhat disturbing is the first mode in rating_ov. Maybe some users confused overlap and non-overlap? Another artifact seems to be the first mode in rating_u3.
Are there Differences between particpants that indicated a specific school type and those that indicated “others”?
### grouped by plot type study1_w_demo %>%group_by(type) %>%do(tau_u3 =unlist(cor(.$effsize, .$rating_u3, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_u3)
### grouped by school typestudy1_w_demo %>%group_by(schooltype_binary) %>%do(tau_u3a =unlist(cor(.$effsize, .$rating_u3, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_u3a)
### grouped by school type and plot typestudy1_w_demo %>%group_by(schooltype_binary, type) %>%do(tau_u3 =unlist(cor(abs(.$effsize), .$rating_u3, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_u3)
### grouped by plot type study1_w_demo %>%group_by(type) %>%do(tau_ov =unlist(cor(abs(.$effsize), .$rating_ov, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_ov)
### grouped by school typestudy1_w_demo %>%group_by(schooltype_binary) %>%do(tau_ov =unlist(cor(abs(.$effsize), .$rating_ov, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_ov)
### grouped by school type and plot type study1_w_demo %>%group_by(schooltype_binary, type) %>%do(tau_ov =unlist(cor(abs(.$effsize), .$rating_ov, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_ov)
### grouped by school typestudy1_w_demo %>%group_by(schooltype_binary) %>%do(tau_cl =unlist(cor(.$effsize, .$rating_cl, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_cl)
### grouped by school type and plot typestudy1_w_demo %>%group_by(schooltype_binary, type) %>%do(tau_cl =unlist(cor(.$effsize, .$rating_cl, method ="kendall", use ="pairwise.complete"))) %>%unnest(tau_cl)
pp_check(logreg_mod2) +theme_modern_rc() +scale_color_manual(values=viridis(2, begin = .3))
Efficiency
Visualisation
Raw data
ggplot(study1_w_timestamp, aes(as.factor(plotNrWithin), effic)) +geom_boxplot(alpha = .2, color ="lightgrey") +geom_sina(alpha = .5) +coord_cartesian(ylim =c(0,100000)) +facet_wrap(~type) +theme_modern_rc() +labs(title ="Dwell Times Until First Decision",subtitle ="Per Plot Type and Plot Repetition") +theme(strip.text =element_text(color ="white"))
5% Percent Truncated
ggplot(study1_w_timestamp, aes(as.factor(plotNrWithin), effic_05righttrunc)) +geom_boxplot(alpha = .2, color ="lightgrey") +geom_sina(alpha = .5) +coord_cartesian(ylim =c(0,85000)) +facet_wrap(~type) +theme_modern_rc() +labs(title ="5% Truncated Dwell Times Until First Decision",subtitle ="Per Plot Type and Plot Repetition") +theme(strip.text =element_text(color ="white"))
10% Percent Truncated
ggplot(study1_w_timestamp, aes(as.factor(plotNrWithin), effic_10righttrunc)) +geom_boxplot(alpha = .2, color ="lightgrey") +geom_sina(alpha = .5) +coord_cartesian(ylim =c(0,85000)) +facet_wrap(~type) +theme_modern_rc() +labs(title ="10% Truncated Dwell Times Until First Decision",subtitle ="Per Plot Type and Plot Repetition") +theme(strip.text =element_text(color ="white"))
log() Transformed
ggplot(study1_w_timestamp, aes(as.factor(plotNrWithin), log(effic_05righttrunc))) +geom_boxplot(alpha = .2, color ="lightgrey") +geom_sina(alpha = .5) +facet_wrap(~type) +theme_modern_rc() +labs(title ="log Transformed Dwell Times Until First Decision",subtitle ="Per Plot Type and Plot Repetition") +theme(strip.text =element_text(color ="white"))
Estimated Bayes factor in favor of plot01_mod2 over plot01_mod1: 1849303621179023889638260437652965887125694371592053899648852270127070606007409660944018230325746250962974478395693028045173301667410539812058979220011876352.00000
Arslan, Ruben C., Matthias P. Walther, and Cyril S. Tata. 2020. “Formr: A Study Framework Allowing for Automated Feedback Generation and Complex Longitudinal Experience-Sampling Studies Using R.”Behavior Research Methods 52 (1): 376–87. https://doi.org/10.3758/s13428-019-01236-y.